from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
print('modules are imported')
modules are imported
dataset_url = 'https://raw.githubusercontent.com/datasets/covid-19/main/data/countries-aggregated.csv'
fname = 'data/countries-aggregated.csv'
df = pd.read_csv(fname)
df_31May21 = df[df.Date == '2020-05-31']
df_31May21.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 130 | 2020-05-31 | Afghanistan | 15208 | 1328 | 258 |
| 658 | 2020-05-31 | Albania | 1137 | 872 | 33 |
| 1186 | 2020-05-31 | Algeria | 9394 | 5748 | 653 |
| 1714 | 2020-05-31 | Andorra | 764 | 694 | 51 |
| 2242 | 2020-05-31 | Angola | 86 | 18 | 4 |
df_31May21.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 130 | 2020-05-31 | Afghanistan | 15208 | 1328 | 258 |
| 658 | 2020-05-31 | Albania | 1137 | 872 | 33 |
| 1186 | 2020-05-31 | Algeria | 9394 | 5748 | 653 |
| 1714 | 2020-05-31 | Andorra | 764 | 694 | 51 |
| 2242 | 2020-05-31 | Angola | 86 | 18 | 4 |
df_31May21.tail()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 100450 | 2020-05-31 | Vietnam | 328 | 279 | 0 |
| 100978 | 2020-05-31 | West Bank and Gaza | 448 | 372 | 3 |
| 101506 | 2020-05-31 | Yemen | 323 | 14 | 80 |
| 102034 | 2020-05-31 | Zambia | 1057 | 779 | 7 |
| 102562 | 2020-05-31 | Zimbabwe | 178 | 29 | 4 |
df_31May21.shape
(195, 5)
df.shape
(102960, 5)
dfconf=df[df.Confirmed>0]
dfconf.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 1 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 1 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 1 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 1 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 1 | 0 | 0 |
dfconf.shape
(91970, 5)
dfconf[dfconf.Country=='Italy'].head(10)
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 44889 | 2020-01-31 | Italy | 2 | 0 | 0 |
| 44890 | 2020-02-01 | Italy | 2 | 0 | 0 |
| 44891 | 2020-02-02 | Italy | 2 | 0 | 0 |
| 44892 | 2020-02-03 | Italy | 2 | 0 | 0 |
| 44893 | 2020-02-04 | Italy | 2 | 0 | 0 |
| 44894 | 2020-02-05 | Italy | 2 | 0 | 0 |
| 44895 | 2020-02-06 | Italy | 2 | 0 | 0 |
| 44896 | 2020-02-07 | Italy | 3 | 0 | 0 |
| 44897 | 2020-02-08 | Italy | 3 | 0 | 0 |
| 44898 | 2020-02-09 | Italy | 3 | 0 | 0 |
fig = px.choropleth(dfconf, locations='Country', locationmode='country names', color='Confirmed', animation_frame='Date')
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 30
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 5
fig.update_geos(projection_type="equirectangular", visible=True, resolution=50)
fig.update_layout(
title_text = 'Global Spread of Coronavirus',
title_x = 0.5,
geo=dict(
showframe = False,
showcoastlines = False,
))
#fig.show()
iplot(fig,show_link=False)
dfdeaths=df[df.Deaths>0]
dfdeaths.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 60 | 2020-03-22 | Afghanistan | 34 | 1 | 1 |
| 61 | 2020-03-23 | Afghanistan | 41 | 1 | 1 |
| 62 | 2020-03-24 | Afghanistan | 43 | 1 | 1 |
| 63 | 2020-03-25 | Afghanistan | 76 | 2 | 2 |
| 64 | 2020-03-26 | Afghanistan | 80 | 2 | 3 |
dfdeaths.shape
(81987, 5)
figD =px.choropleth(dfdeaths, locations='Country', locationmode='country names', color='Deaths', animation_frame='Date')
figD.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 30
figD.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 5
figD.update_geos(projection_type="equirectangular", visible=True, resolution=50)
figD.update_layout(
title_text = 'Global Deaths of Coronavirus',
title_x = 0.5,
geo=dict(
showframe = False,
showcoastlines = False,
))
#figD.show()
iplot(figD,show_link=False)
df_china=df[df.Country == 'China']
df_china.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 19008 | 2020-01-22 | China | 548 | 28 | 17 |
| 19009 | 2020-01-23 | China | 643 | 30 | 18 |
| 19010 | 2020-01-24 | China | 920 | 36 | 26 |
| 19011 | 2020-01-25 | China | 1406 | 39 | 42 |
| 19012 | 2020-01-26 | China | 2075 | 49 | 56 |
let's select the columns that we need
df_china=df_china[['Date','Confirmed']]
df_china.head()
| Date | Confirmed | |
|---|---|---|
| 19008 | 2020-01-22 | 548 |
| 19009 | 2020-01-23 | 643 |
| 19010 | 2020-01-24 | 920 |
| 19011 | 2020-01-25 | 1406 |
| 19012 | 2020-01-26 | 2075 |
calculating the first derivation of confrimed column
df_china['Infection Rate']=df_china['Confirmed'].diff()
df_china.head()
| Date | Confirmed | Infection Rate | |
|---|---|---|---|
| 19008 | 2020-01-22 | 548 | NaN |
| 19009 | 2020-01-23 | 643 | 95.0 |
| 19010 | 2020-01-24 | 920 | 277.0 |
| 19011 | 2020-01-25 | 1406 | 486.0 |
| 19012 | 2020-01-26 | 2075 | 669.0 |
fline = px.line(df_china, x='Date', y=['Confirmed', 'Infection Rate'])
iplot(fline,show_link=False)
df_china['Infection Rate'].max()
15136.0
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 | 0 | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 | 0 | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 | 0 | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 | 0 | 0 |
countries=list(df['Country'].unique())
countries[0]
'Afghanistan'
max_infection_rate=[]
for c in countries :
MIR = df[df.Country == c].Confirmed.diff().max()
max_infection_rate.append(MIR)
#print(max_infection_rate)
df_MIR=pd.DataFrame()
df_MIR['Country'] = countries
df_MIR['Max Infection Rate'] = max_infection_rate
df_MIR.head()
| Country | Max Infection Rate | |
|---|---|---|
| 0 | Afghanistan | 5168.0 |
| 1 | Albania | 1239.0 |
| 2 | Algeria | 1133.0 |
| 3 | Andorra | 299.0 |
| 4 | Angola | 405.0 |
fbar = px.bar(df_MIR, x='Country', y='Max Infection Rate', color='Country', title='global maximum infection rate', log_y=True)
iplot(fbar, show_link = False)
log to increase the quALITY FOR low bars - changes scale for y axis
On 9 March 2020, the government of Italy under Prime Minister Giuseppe Conte imposed a national quarantine, restricting the movement of the population except for necessity, work, and health circumstances, in response to the growing pandemic of COVID-19 in the country. source
italy_lockdown_start_date = '2020-03-09'
italy_lockdown_a_month_later = '2020-04-09'
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 | 0 | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 | 0 | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 | 0 | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 | 0 | 0 |
let's get data related to italy
df_italy=df[df.Country=='Italy']
lets check the dataframe
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 44880 | 2020-01-22 | Italy | 0 | 0 | 0 |
| 44881 | 2020-01-23 | Italy | 0 | 0 | 0 |
| 44882 | 2020-01-24 | Italy | 0 | 0 | 0 |
| 44883 | 2020-01-25 | Italy | 0 | 0 | 0 |
| 44884 | 2020-01-26 | Italy | 0 | 0 | 0 |
let's calculate the infection rate in Italy
df_italy['Infection Rate']=df_italy.Confirmed.diff()
df_italy.head()
/var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/3001688291.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 44880 | 2020-01-22 | Italy | 0 | 0 | 0 | NaN |
| 44881 | 2020-01-23 | Italy | 0 | 0 | 0 | 0.0 |
| 44882 | 2020-01-24 | Italy | 0 | 0 | 0 | 0.0 |
| 44883 | 2020-01-25 | Italy | 0 | 0 | 0 | 0.0 |
| 44884 | 2020-01-26 | Italy | 0 | 0 | 0 | 0.0 |
ok! now let's do the visualization
FigIt=px.line(df_italy, x='Date', y='Infection Rate', title="Before and After lockdown in Italy")
#FigIt.show()
iplot(FigIt, show_link = False)
FigIt2=px.line(df_italy, x='Date', y='Infection Rate', title="Before and After lockdown in Italy")
FigIt2.add_shape(
dict(
type="line",
x0=italy_lockdown_start_date,
y0=0,
x1=italy_lockdown_start_date,
y1=df_italy['Infection Rate'].max(),
line=dict(color='red', width=2)
)
)
FigIt2.add_annotation(
dict(
x=italy_lockdown_start_date,
y=df_italy['Infection Rate'].max(),
text='Starting Date of Lockdown'
)
)
#FigIt2.show()
iplot(FigIt2, show_link = False)
FigIt3=px.line(df_italy, x='Date', y='Infection Rate', title="Before and After lockdown in Italy")
FigIt3.add_shape(
dict(
type="line",
x0=italy_lockdown_start_date,
y0=0,
x1=italy_lockdown_start_date,
y1=df_italy['Infection Rate'].max(),
line=dict(color='red', width=2)
)
)
FigIt3.add_annotation(
dict(
x=italy_lockdown_start_date,
y=df_italy['Infection Rate'].max(),
text='Starting Date of Lockdown'
)
)
FigIt3.add_shape(
dict(
type="line",
x0=italy_lockdown_a_month_later,
y0=0,
x1=italy_lockdown_a_month_later,
y1=df_italy['Infection Rate'].max(),
line=dict(color='red', width=2)
)
)
FigIt3.add_annotation(
dict(
x=italy_lockdown_a_month_later,
y=4000,
text='One month post Lockdown'
)
)
#FigIt3.show()
iplot(FigIt3, show_link = False)
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 44880 | 2020-01-22 | Italy | 0 | 0 | 0 | NaN |
| 44881 | 2020-01-23 | Italy | 0 | 0 | 0 | 0.0 |
| 44882 | 2020-01-24 | Italy | 0 | 0 | 0 | 0.0 |
| 44883 | 2020-01-25 | Italy | 0 | 0 | 0 | 0.0 |
| 44884 | 2020-01-26 | Italy | 0 | 0 | 0 | 0.0 |
let's calculate number of active cases day by day
df_italy['Death Rate']=df_italy.Deaths.diff()
/var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/834131105.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's check the dataframe again
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | Death Rate | |
|---|---|---|---|---|---|---|---|
| 44880 | 2020-01-22 | Italy | 0 | 0 | 0 | NaN | NaN |
| 44881 | 2020-01-23 | Italy | 0 | 0 | 0 | 0.0 | 0.0 |
| 44882 | 2020-01-24 | Italy | 0 | 0 | 0 | 0.0 | 0.0 |
| 44883 | 2020-01-25 | Italy | 0 | 0 | 0 | 0.0 | 0.0 |
| 44884 | 2020-01-26 | Italy | 0 | 0 | 0 | 0.0 | 0.0 |
now let's plot a line chart to compare COVID19 national lockdowns impacts on spread of the virus and number of active cases
figit4=px.line(df_italy, x='Date', y=['Infection Rate', 'Death Rate'])
#figit4.show()
iplot(figit4,show_link=False)
df_italy['N Infection Rate']=df_italy['Infection Rate']/df_italy['Infection Rate'].max()
df_italy['N Death Rate']=df_italy['Death Rate']/df_italy['Death Rate'].max()
/var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/3675118474.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/3675118474.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
figf= px.line(df_italy, x='Date', y=['N Infection Rate', 'N Death Rate'])
#figf.show()
iplot(figf, show_link=False)
figf1= px.line(df_italy, x='Date', y=['N Infection Rate', 'N Death Rate'], title="Infection Rate and Death rate pre and post lockdown")
figf1.add_shape(
dict(
type="line",
x0=italy_lockdown_start_date,
y0=0,
x1=italy_lockdown_start_date,
y1=df_italy['N Infection Rate'].max(),
line=dict(color='yellow', width=2)
)
)
figf1.add_annotation(
dict(
x=italy_lockdown_start_date,
y=df_italy['N Infection Rate'].max(),
text='Starting Date of Lockdown'
)
)
figf1.add_shape(
dict(
type="line",
x0=italy_lockdown_a_month_later,
y0=0,
x1=italy_lockdown_a_month_later,
y1=df_italy['N Infection Rate'].max(),
line=dict(color='yellow', width=2)
)
)
figf1.add_annotation(
dict(
x=italy_lockdown_a_month_later,
y=0,
text='One month post Lockdown'
)
)
#figf1.show()
iplot(figf1, show_link=False)
Lockdown was started in Freiburg, Baden-Württemberg and Bavaria on 20 March 2020. Three days later, it was expanded to the whole of Germany
Germany_lockdown_start_date = '2020-03-23'
Germany_lockdown_a_month_later = '2020-04-23'
let's select the data related to Germany
df_germany=df[df.Country=='Germany']
let's check the dataframe
df_germany.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 34848 | 2020-01-22 | Germany | 0 | 0 | 0 |
| 34849 | 2020-01-23 | Germany | 0 | 0 | 0 |
| 34850 | 2020-01-24 | Germany | 0 | 0 | 0 |
| 34851 | 2020-01-25 | Germany | 0 | 0 | 0 |
| 34852 | 2020-01-26 | Germany | 0 | 0 | 0 |
selecting the needed column
df_germany['Infection Rate']=df_germany.Confirmed.diff()
df_germany['Death Rate']=df_germany.Deaths.diff()
/var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/2770182711.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/2770182711.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's check it again
df_germany.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | Death Rate | |
|---|---|---|---|---|---|---|---|
| 34848 | 2020-01-22 | Germany | 0 | 0 | 0 | NaN | NaN |
| 34849 | 2020-01-23 | Germany | 0 | 0 | 0 | 0.0 | 0.0 |
| 34850 | 2020-01-24 | Germany | 0 | 0 | 0 | 0.0 | 0.0 |
| 34851 | 2020-01-25 | Germany | 0 | 0 | 0 | 0.0 | 0.0 |
| 34852 | 2020-01-26 | Germany | 0 | 0 | 0 | 0.0 | 0.0 |
let's calculate the infection rate in Germany
df_germany['N Infection Rate']=df_germany['Infection Rate']/df_germany['Infection Rate'].max()
df_germany['N Death Rate']=df_germany['Death Rate']/df_germany['Death Rate'].max()
/var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/1837195417.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /var/folders/43/4nqhk6qx3kxcwf85q5ncg9lm0000gn/T/ipykernel_11555/1837195417.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's check the dataframe
df_germany.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | Death Rate | N Infection Rate | N Death Rate | |
|---|---|---|---|---|---|---|---|---|---|
| 34848 | 2020-01-22 | Germany | 0 | 0 | 0 | NaN | NaN | NaN | NaN |
| 34849 | 2020-01-23 | Germany | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 34850 | 2020-01-24 | Germany | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 34851 | 2020-01-25 | Germany | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 34852 | 2020-01-26 | Germany | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 |
now let's plot the line chart
figg= px.line(df_germany, x='Date', y=['N Infection Rate', 'N Death Rate'], title="Infection Rate and Death rate in Germany pre and post lockdown")
figg.add_shape(
dict(
type="line",
x0=Germany_lockdown_start_date,
y0=0,
x1=Germany_lockdown_start_date,
y1=df_germany['N Infection Rate'].max(),
line=dict(color='yellow', width=2)
)
)
figg.add_annotation(
dict(
x=Germany_lockdown_start_date,
y=df_germany['N Infection Rate'].max(),
text='Starting Date of Lockdown'
)
)
figg.add_shape(
dict(
type="line",
x0=Germany_lockdown_a_month_later,
y0=0,
x1=Germany_lockdown_a_month_later,
y1=df_germany['N Infection Rate'].max(),
line=dict(color='yellow', width=2)
)
)
figg.add_annotation(
dict(
x=Germany_lockdown_a_month_later,
y=0,
text='One month post Lockdown'
)
)
#figg.show()
iplot(figg,show_link=False)